/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.catalina.util;

import java.util.*;

/**
 * MIME2Java is a convenience class which handles conversions between MIME
 * charset names and Java encoding names.
 * <p>
 * The supported XML encodings are the intersection of XML-supported code sets
 * and those supported in JDK 1.1.
 * <p>
 * MIME charset names are used on <var>xmlEncoding</var> parameters to methods
 * such as <code>TXDocument#setEncoding</code> and <code>DTD#setEncoding</code>.
 * <p>
 * Java encoding names are used on <var>encoding</var> parameters to methods
 * such as <code>TXDocument#printWithFormat</code> and
 * <code>DTD#printExternal</code>.
 * <P>
 * <TABLE BORDER="0" WIDTH="100%">
 * <TR>
 * <TD WIDTH="33%">
 * <P ALIGN="CENTER"><B>Common Name</B></TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER"><B>Use this name in XML files</B></TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER"><B>Name Type</B></TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER"><B>Xerces converts to this Java Encoder Name</B></TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">8 bit Unicode</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">UTF-8</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">UTF8</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin 1</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-1</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-1</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin 2</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-2</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-2</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin 3</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-3</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-3</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin 4</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-4</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-4</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin Cyrillic</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-5</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-5</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin Arabic</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-6</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-6</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin Greek</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-7</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-7</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin Hebrew</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-8</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-8</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">ISO Latin 5</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ISO-8859-9</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">ISO-8859-9</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: US</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-us</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp037</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Canada</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-ca</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp037</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Netherlands</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-nl</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp037</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Denmark</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-dk</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp277</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Norway</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-no</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp277</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Finland</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-fi</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp278</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Sweden</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-se</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp278</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Italy</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-it</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp280</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Spain, Latin America</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-es</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp284</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Great Britain</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-gb</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp285</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: France</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-fr</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp297</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Arabic</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-ar1</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp420</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Hebrew</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-he</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp424</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Switzerland</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-ch</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp500</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Roece</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-roece</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp870</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Yogoslavia</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-yu</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp870</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Iceland</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-is</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp871</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">EBCDIC: Urdu</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">ebcdic-cp-ar2</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">IANA</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">cp918</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Chinese for PRC, mixed 1/2 byte</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">gb2312</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">GB2312</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Extended Unix Code, packed for Japanese</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">euc-jp</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">eucjis</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Japanese: iso-2022-jp</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">iso-2020-jp</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">JIS</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Japanese: Shift JIS</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">Shift_JIS</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">SJIS</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Chinese: Big5</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">Big5</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">Big5</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Extended Unix Code, packed for Korean</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">euc-kr</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">iso2022kr</TD>
 * </TR>
 * <TR>
 * <TD WIDTH="33%">Cyrillic</TD>
 * <TD WIDTH="15%">
 * <P ALIGN="CENTER">koi8-r</TD>
 * <TD WIDTH="12%">
 * <P ALIGN="CENTER">MIME</TD>
 * <TD WIDTH="31%">
 * <P ALIGN="CENTER">koi8-r</TD>
 * </TR>
 * </TABLE>
 * 
 * @version $Id: MIME2Java.java 939353 2010-04-29 15:50:43Z kkolinko $
 * @author TAMURA Kent &lt;kent@trl.ibm.co.jp&gt;
 */
public class MIME2Java {

	static private Hashtable s_enchash;
	static private Hashtable s_revhash;

	static {
		s_enchash = new Hashtable();
		// <preferred MIME name>, <Java encoding name>
		s_enchash.put("UTF-8", "UTF8");
		s_enchash.put("US-ASCII", "8859_1"); // ?
		s_enchash.put("ISO-8859-1", "8859_1");
		s_enchash.put("ISO-8859-2", "8859_2");
		s_enchash.put("ISO-8859-3", "8859_3");
		s_enchash.put("ISO-8859-4", "8859_4");
		s_enchash.put("ISO-8859-5", "8859_5");
		s_enchash.put("ISO-8859-6", "8859_6");
		s_enchash.put("ISO-8859-7", "8859_7");
		s_enchash.put("ISO-8859-8", "8859_8");
		s_enchash.put("ISO-8859-9", "8859_9");
		s_enchash.put("ISO-2022-JP", "JIS");
		s_enchash.put("SHIFT_JIS", "SJIS");
		s_enchash.put("EUC-JP", "EUCJIS");
		s_enchash.put("GB2312", "GB2312");
		s_enchash.put("BIG5", "Big5");
		s_enchash.put("EUC-KR", "KSC5601");
		s_enchash.put("ISO-2022-KR", "ISO2022KR");
		s_enchash.put("KOI8-R", "KOI8_R");

		s_enchash.put("EBCDIC-CP-US", "CP037");
		s_enchash.put("EBCDIC-CP-CA", "CP037");
		s_enchash.put("EBCDIC-CP-NL", "CP037");
		s_enchash.put("EBCDIC-CP-DK", "CP277");
		s_enchash.put("EBCDIC-CP-NO", "CP277");
		s_enchash.put("EBCDIC-CP-FI", "CP278");
		s_enchash.put("EBCDIC-CP-SE", "CP278");
		s_enchash.put("EBCDIC-CP-IT", "CP280");
		s_enchash.put("EBCDIC-CP-ES", "CP284");
		s_enchash.put("EBCDIC-CP-GB", "CP285");
		s_enchash.put("EBCDIC-CP-FR", "CP297");
		s_enchash.put("EBCDIC-CP-AR1", "CP420");
		s_enchash.put("EBCDIC-CP-HE", "CP424");
		s_enchash.put("EBCDIC-CP-CH", "CP500");
		s_enchash.put("EBCDIC-CP-ROECE", "CP870");
		s_enchash.put("EBCDIC-CP-YU", "CP870");
		s_enchash.put("EBCDIC-CP-IS", "CP871");
		s_enchash.put("EBCDIC-CP-AR2", "CP918");

		// j:CNS11643 -> EUC-TW?
		// ISO-2022-CN? ISO-2022-CN-EXT?

		s_revhash = new Hashtable();
		// <Java encoding name>, <preferred MIME name>
		s_revhash.put("UTF8", "UTF-8");
		// s_revhash.put("8859_1", "US-ASCII"); // ?
		s_revhash.put("8859_1", "ISO-8859-1");
		s_revhash.put("8859_2", "ISO-8859-2");
		s_revhash.put("8859_3", "ISO-8859-3");
		s_revhash.put("8859_4", "ISO-8859-4");
		s_revhash.put("8859_5", "ISO-8859-5");
		s_revhash.put("8859_6", "ISO-8859-6");
		s_revhash.put("8859_7", "ISO-8859-7");
		s_revhash.put("8859_8", "ISO-8859-8");
		s_revhash.put("8859_9", "ISO-8859-9");
		s_revhash.put("JIS", "ISO-2022-JP");
		s_revhash.put("SJIS", "Shift_JIS");
		s_revhash.put("EUCJIS", "EUC-JP");
		s_revhash.put("GB2312", "GB2312");
		s_revhash.put("BIG5", "Big5");
		s_revhash.put("KSC5601", "EUC-KR");
		s_revhash.put("ISO2022KR", "ISO-2022-KR");
		s_revhash.put("KOI8_R", "KOI8-R");

		s_revhash.put("CP037", "EBCDIC-CP-US");
		s_revhash.put("CP037", "EBCDIC-CP-CA");
		s_revhash.put("CP037", "EBCDIC-CP-NL");
		s_revhash.put("CP277", "EBCDIC-CP-DK");
		s_revhash.put("CP277", "EBCDIC-CP-NO");
		s_revhash.put("CP278", "EBCDIC-CP-FI");
		s_revhash.put("CP278", "EBCDIC-CP-SE");
		s_revhash.put("CP280", "EBCDIC-CP-IT");
		s_revhash.put("CP284", "EBCDIC-CP-ES");
		s_revhash.put("CP285", "EBCDIC-CP-GB");
		s_revhash.put("CP297", "EBCDIC-CP-FR");
		s_revhash.put("CP420", "EBCDIC-CP-AR1");
		s_revhash.put("CP424", "EBCDIC-CP-HE");
		s_revhash.put("CP500", "EBCDIC-CP-CH");
		s_revhash.put("CP870", "EBCDIC-CP-ROECE");
		s_revhash.put("CP870", "EBCDIC-CP-YU");
		s_revhash.put("CP871", "EBCDIC-CP-IS");
		s_revhash.put("CP918", "EBCDIC-CP-AR2");
	}

	private MIME2Java() {
	}

	/**
	 * Convert a MIME charset name, also known as an XML encoding name, to a
	 * Java encoding name.
	 * 
	 * @param mimeCharsetName
	 *            Case insensitive MIME charset name:
	 *            <code>UTF-8, US-ASCII, ISO-8859-1,
	 *                          ISO-8859-2, ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6,
	 *                          ISO-8859-7, ISO-8859-8, ISO-8859-9, ISO-2022-JP, Shift_JIS,
	 *                          EUC-JP, GB2312, Big5, EUC-KR, ISO-2022-KR, KOI8-R,
	 *                          EBCDIC-CP-US, EBCDIC-CP-CA, EBCDIC-CP-NL, EBCDIC-CP-DK,
	 *                          EBCDIC-CP-NO, EBCDIC-CP-FI, EBCDIC-CP-SE, EBCDIC-CP-IT,
	 *                          EBCDIC-CP-ES, EBCDIC-CP-GB, EBCDIC-CP-FR, EBCDIC-CP-AR1,
	 *                          EBCDIC-CP-HE, EBCDIC-CP-CH, EBCDIC-CP-ROECE, EBCDIC-CP-YU,
	 *                          EBCDIC-CP-IS and EBCDIC-CP-AR2</code>.
	 * @return Java encoding name, or <var>null</var> if
	 *         <var>mimeCharsetName</var> is unknown.
	 * @see #reverse
	 */
	public static String convert(String mimeCharsetName) {
		return (String) s_enchash.get(mimeCharsetName.toUpperCase());
	}

	/**
	 * Convert a Java encoding name to MIME charset name. Available values of
	 * <i>encoding</i> are "UTF8", "8859_1", "8859_2", "8859_3", "8859_4",
	 * "8859_5", "8859_6", "8859_7", "8859_8", "8859_9", "JIS", "SJIS",
	 * "EUCJIS", "GB2312", "BIG5", "KSC5601", "ISO2022KR", "KOI8_R", "CP037",
	 * "CP277", "CP278", "CP280", "CP284", "CP285", "CP297", "CP420", "CP424",
	 * "CP500", "CP870", "CP871" and "CP918".
	 * 
	 * @param encoding
	 *            Case insensitive Java encoding name:
	 *            <code>UTF8, 8859_1, 8859_2, 8859_3,
	 *                      8859_4, 8859_5, 8859_6, 8859_7, 8859_8, 8859_9, JIS, SJIS, EUCJIS,
	 *                      GB2312, BIG5, KSC5601, ISO2022KR, KOI8_R, CP037, CP277, CP278,
	 *                      CP280, CP284, CP285, CP297, CP420, CP424, CP500, CP870, CP871
	 *                      and CP918</code>.
	 * @return MIME charset name, or <var>null</var> if <var>encoding</var> is
	 *         unknown.
	 * @see #convert
	 */
	public static String reverse(String encoding) {
		return (String) s_revhash.get(encoding.toUpperCase());
	}
}
